library(ArchR)
## Loading required package: ggplot2
## Loading required package: SummarizedExperiment
## Loading required package: GenomicRanges
## Loading required package: stats4
## Loading required package: BiocGenerics
## Loading required package: parallel
##
## Attaching package: 'BiocGenerics'
## The following objects are masked from 'package:parallel':
##
## clusterApply, clusterApplyLB, clusterCall, clusterEvalQ,
## clusterExport, clusterMap, parApply, parCapply, parLapply,
## parLapplyLB, parRapply, parSapply, parSapplyLB
## The following objects are masked from 'package:stats':
##
## IQR, mad, sd, var, xtabs
## The following objects are masked from 'package:base':
##
## anyDuplicated, append, as.data.frame, basename, cbind, colnames,
## dirname, do.call, duplicated, eval, evalq, Filter, Find, get, grep,
## grepl, intersect, is.unsorted, lapply, Map, mapply, match, mget,
## order, paste, pmax, pmax.int, pmin, pmin.int, Position, rank,
## rbind, Reduce, rownames, sapply, setdiff, sort, table, tapply,
## union, unique, unsplit, which, which.max, which.min
## Loading required package: S4Vectors
##
## Attaching package: 'S4Vectors'
## The following object is masked from 'package:base':
##
## expand.grid
## Loading required package: IRanges
## Loading required package: GenomeInfoDb
## Loading required package: Biobase
## Welcome to Bioconductor
##
## Vignettes contain introductory material; view with
## 'browseVignettes()'. To cite Bioconductor, see
## 'citation("Biobase")', and for packages 'citation("pkgname")'.
## Loading required package: DelayedArray
## Loading required package: matrixStats
##
## Attaching package: 'matrixStats'
## The following objects are masked from 'package:Biobase':
##
## anyMissing, rowMedians
## Loading required package: BiocParallel
##
## Attaching package: 'DelayedArray'
## The following objects are masked from 'package:matrixStats':
##
## colMaxs, colMins, colRanges, rowMaxs, rowMins, rowRanges
## The following objects are masked from 'package:base':
##
## aperm, apply, rowsum
## Loading required package: data.table
##
## Attaching package: 'data.table'
## The following object is masked from 'package:SummarizedExperiment':
##
## shift
## The following object is masked from 'package:GenomicRanges':
##
## shift
## The following object is masked from 'package:IRanges':
##
## shift
## The following objects are masked from 'package:S4Vectors':
##
## first, second
## Loading required package: Matrix
##
## Attaching package: 'Matrix'
## The following object is masked from 'package:S4Vectors':
##
## expand
## Loading required package: rhdf5
## Loading required package: magrittr
archProj <- loadArchRProject("../data/scATAC/archR_subHybridClusters/",
showLogo = FALSE)
## Successfully loaded ArchRProject!
plotEmbedding(ArchRProj = archProj, colorBy = "cellColData",
name = "predAnn", embedding = "UMAP_cor")
## ArchR logging to : ArchRLogs/ArchR-plotEmbedding-4c7899d9c87-Date-2021-11-04_Time-16-54-28.log
## If there is an issue, please report to github with logFile!
## Getting UMAP Embedding
## ColorBy = cellColData
## Plotting Embedding
## 1
## ArchR logging successful to : ArchRLogs/ArchR-plotEmbedding-4c7899d9c87-Date-2021-11-04_Time-16-54-28.log

archProj <- addMotifAnnotations(ArchRProj = archProj,
motifSet = "cisbp",
name = "Motif",
force=TRUE)
## No methods found in package 'IRanges' for request: 'score' when loading 'TFBSTools'
## ArchR logging to : ArchRLogs/ArchR-addMotifAnnotations-4c784a8f67ff-Date-2021-11-04_Time-16-54-30.log
## If there is an issue, please report to github with logFile!
## peakAnnotation name already exists! Overriding.
## 2021-11-04 16:54:35 : Gettting Motif Set, Species : Mus musculus, 0.001 mins elapsed.
## Using version 2 motifs!
## 2021-11-04 16:54:37 : Finding Motif Positions with motifmatchr!, 0.042 mins elapsed.
## 2021-11-04 16:58:06 : Creating Motif Overlap Matrix, 3.526 mins elapsed.
## 2021-11-04 16:58:11 : Finished Getting Motif Info!, 3.601 mins elapsed.
## ArchR logging successful to : ArchRLogs/ArchR-addMotifAnnotations-4c784a8f67ff-Date-2021-11-04_Time-16-54-30.log
## get ArchR peak markers for motif enrichment
markerPeaks_subHybrid <- getMarkerFeatures(
ArchRProj = archProj,
useMatrix = "PeakMatrix",
groupBy = "predAnn",
bias = c("TSSEnrichment", "log10(nFrags)"),
testMethod = "wilcoxon")
## ArchR logging to : ArchRLogs/ArchR-getMarkerFeatures-4c78cf4bc97-Date-2021-11-04_Time-16-58-15.log
## If there is an issue, please report to github with logFile!
## MatrixClass = Sparse.Integer.Matrix
## 2021-11-04 16:58:15 : Matching Known Biases, 0.005 mins elapsed.
## 2021-11-04 16:58:17 : Computing Pairwise Tests (1 of 5), 0.03 mins elapsed.
## 2021-11-04 16:58:41 : Computing Pairwise Tests (2 of 5), 0.436 mins elapsed.
## 2021-11-04 16:59:06 : Computing Pairwise Tests (3 of 5), 0.848 mins elapsed.
## 2021-11-04 16:59:28 : Computing Pairwise Tests (4 of 5), 1.208 mins elapsed.
## 2021-11-04 16:59:50 : Computing Pairwise Tests (5 of 5), 1.577 mins elapsed.
## ###########
## 2021-11-04 17:00:13 : Completed Pairwise Tests, 1.956 mins elapsed.
## ###########
## ArchR logging successful to : ArchRLogs/ArchR-getMarkerFeatures-4c78cf4bc97-Date-2021-11-04_Time-16-58-15.log
peakMarkers_subHybrid <- getMarkers(markerPeaks_subHybrid,
cutOff = "FDR <= 0.01 & Log2FC >= 1.25",
returnGR = TRUE)
heatmapMarkerPeaks <- plotMarkerHeatmap(
seMarker = markerPeaks_subHybrid,
cutOff = "FDR <= 0.01 & Log2FC >= 1.25",
transpose = TRUE
)
## ArchR logging to : ArchRLogs/ArchR-plotMarkerHeatmap-4c7817896426-Date-2021-11-04_Time-17-00-15.log
## If there is an issue, please report to github with logFile!
## Identified 20694 markers!
## Preparing Main Heatmap..
## ArchR logging successful to : ArchRLogs/ArchR-plotMarkerHeatmap-4c7817896426-Date-2021-11-04_Time-17-00-15.log
ComplexHeatmap::draw(heatmapMarkerPeaks, heatmap_legend_side = "bot", annotation_legend_side = "bot")

Check accessibility of Egfr target genes
egfrTargetDf <- read.csv("../data/genesTargets/egfr_targets_unfiltered.csv")
pCluster <- plotEmbedding(ArchRProj = archProj,
colorBy = "cellColData",
name = "predAnn",
embedding = "UMAP_cor")
## ArchR logging to : ArchRLogs/ArchR-plotEmbedding-4c782bb23d88-Date-2021-11-04_Time-17-00-22.log
## If there is an issue, please report to github with logFile!
## Getting UMAP Embedding
## ColorBy = cellColData
## Plotting Embedding
## 1
## ArchR logging successful to : ArchRLogs/ArchR-plotEmbedding-4c782bb23d88-Date-2021-11-04_Time-17-00-22.log
pCluster

allGenes <- unname(unique(mcols(archProj@geneAnnotation$genes)$symbol))
egfrTargets <- intersect(as.character(egfrTargetDf$target), allGenes)
## get peak count matrix
ArrowFiles <- getArrowFiles(archProj)
featureDF <- ArchR:::.getFeatureDF(head(ArrowFiles, 2), "PeakMatrix")
peakMat <- ArchR:::.getPartialMatrix(ArrowFiles,
featureDF = featureDF, threads = 1, useMatrix = "PeakMatrix",
cellNames = rownames(archProj@cellColData), progress = FALSE)
## 2021-11-04 17:00:23 : Getting Partial Matrix 1 of 6, 0 mins elapsed.
## 2021-11-04 17:00:31 : Getting Partial Matrix 2 of 6, 0.135 mins elapsed.
## 2021-11-04 17:00:34 : Getting Partial Matrix 3 of 6, 0.181 mins elapsed.
## 2021-11-04 17:00:38 : Getting Partial Matrix 4 of 6, 0.251 mins elapsed.
## 2021-11-04 17:00:43 : Getting Partial Matrix 5 of 6, 0.322 mins elapsed.
## 2021-11-04 17:00:46 : Getting Partial Matrix 6 of 6, 0.375 mins elapsed.
## 2021-11-04 17:00:54 : Successfully Created Partial Matrix, 0.513 mins elapsed.
## for each gene, get the peak closest to TSS
peakGR <- archProj@peakSet
tssPeaks <- c()
for(gg in 1:length(egfrTargets)){
curGene <- egfrTargets[gg]
curID <- which(mcols(peakGR)$nearestGene == curGene)
if(length(curID) > 0 ){
curTSSPeak <- curID[which.min(mcols(peakGR)$distToTSS[curID])]
tssPeaks[gg] <- curTSSPeak
} else {
tssPeaks[gg] <- NA
next
}
}
egfrTargets <- egfrTargets[!is.na(tssPeaks)]
tssPeaks <- tssPeaks[!is.na(tssPeaks)]
egfrTargetPeaks <- peakMat[tssPeaks,]
# egfrTargetPeakScaledSum <- colSums(t(scale(t(egfrTargetPeaks))))
egfrTargetPeakScaledSum <- colSums(egfrTargetPeaks)
umapEmbedding <- getEmbedding(archProj, embedding="UMAP_cor")
colnames(umapEmbedding) <- paste0("UMAP",1:2)
umapEmbedding$egfrTargetScaledSum <- egfrTargetPeakScaledSum
pEgfrTargets <- ggplot(umapEmbedding, aes(x=UMAP1,
y=UMAP2,
col=egfrTargetScaledSum)) +
theme_classic() +
geom_point() +
scale_color_gradientn(colors=wesanderson::wes_palette("Zissou1", n=100, type="continuous"))
pEgfrTargets

Check accessibility of peaks containining Ets TFBS motif
motifMatches <- readRDS(archProj@peakAnnotation$Motif$Matches)
etsMotifId <- grep(x=colnames(motifMatches), pattern="Ets")
etsMotifs <- grep(x=colnames(motifMatches), pattern="Ets", value=TRUE)
peaksWithEtsMotifs <- which(rowSums(assays(motifMatches[,etsMotifs])$matches) > 0)
length(peaksWithEtsMotifs)
## [1] 10594
umapEmbedding$etsMotifPeaks <- colSums(peakMat[peaksWithEtsMotifs,])
pEtsMotifs <- ggplot(umapEmbedding, aes(x=UMAP1,
y=UMAP2,
col=etsMotifPeaks)) +
theme_classic() +
geom_point() +
scale_color_gradientn(colors=wesanderson::wes_palette("Zissou1", n=100, type="continuous"))
pEtsMotifs

Association between Egfr targets’ accessibility and Ets motif peaks’ accessibility
plot(umapEmbedding$egfrTargetScaledSum, umapEmbedding$etsMotifPeaks,
xlab = "Egfr targets' accessibility",
ylab = "Ets-motif-containing peaks' accessibility")

Gene markers for (sub)hybrid clusters
plotEmbedding(ArchRProj = archProj, colorBy = "cellColData",
name = "predAnn", embedding = "UMAP_cor")
## ArchR logging to : ArchRLogs/ArchR-plotEmbedding-4c786b530a63-Date-2021-11-04_Time-17-01-00.log
## If there is an issue, please report to github with logFile!
## Getting UMAP Embedding
## ColorBy = cellColData
## Plotting Embedding
## 1
## ArchR logging successful to : ArchRLogs/ArchR-plotEmbedding-4c786b530a63-Date-2021-11-04_Time-17-01-00.log

## get ArchR gene markers for motif enrichment
markerGenes_subHybrid <- getMarkerFeatures(
ArchRProj = archProj,
useMatrix = "GeneScoreMatrix",
groupBy = "predAnn",
bias = c("TSSEnrichment", "log10(nFrags)"),
testMethod = "wilcoxon")
## ArchR logging to : ArchRLogs/ArchR-getMarkerFeatures-4c78466736c0-Date-2021-11-04_Time-17-01-02.log
## If there is an issue, please report to github with logFile!
## MatrixClass = Sparse.Double.Matrix
## 2021-11-04 17:01:02 : Matching Known Biases, 0.001 mins elapsed.
## 2021-11-04 17:01:03 : Computing Pairwise Tests (1 of 5), 0.019 mins elapsed.
## 2021-11-04 17:01:23 : Computing Pairwise Tests (2 of 5), 0.359 mins elapsed.
## 2021-11-04 17:01:44 : Computing Pairwise Tests (3 of 5), 0.701 mins elapsed.
## 2021-11-04 17:02:02 : Computing Pairwise Tests (4 of 5), 1.003 mins elapsed.
## 2021-11-04 17:02:20 : Computing Pairwise Tests (5 of 5), 1.302 mins elapsed.
## ###########
## 2021-11-04 17:02:39 : Completed Pairwise Tests, 1.618 mins elapsed.
## ###########
## ArchR logging successful to : ArchRLogs/ArchR-getMarkerFeatures-4c78466736c0-Date-2021-11-04_Time-17-01-02.log
geneMarkers_subHybrid <- getMarkers(markerGenes_subHybrid,
cutOff = "FDR <= 0.01 & Log2FC >= 1.25")
write.csv(geneMarkers_subHybrid$C2_Hybrid, file="../data/scATAC/markerGenesHybrid.csv")
write.csv(geneMarkers_subHybrid$hybrid1, file="../data/scATAC/markerGenesHybrid1.csv")
write.csv(geneMarkers_subHybrid$hybrid2, file="../data/scATAC/markerGenesHybrid2.csv")
write.csv(geneMarkers_subHybrid$C1_Inj, file="../data/scATAC/markerGenesInjured.csv")
write.csv(geneMarkers_subHybrid$resting, file="../data/scATAC/markerGenesResting.csv")
Marker genes for hybrid1
markerGenesHybrid1 <- geneMarkers_subHybrid$hybrid1$name[1:18]
p <- plotEmbedding(
ArchRProj = archProj,
colorBy = "GeneScoreMatrix",
name = markerGenesHybrid1,
embedding = "UMAP_cor",
quantCut = c(0.01, 0.95),
size=1/3,
plotAs="points"
)
## Getting ImputeWeights
## No imputeWeights found, returning NULL
## ArchR logging to : ArchRLogs/ArchR-plotEmbedding-4c787754c692-Date-2021-11-04_Time-17-02-39.log
## If there is an issue, please report to github with logFile!
## Getting UMAP Embedding
## ColorBy = GeneScoreMatrix
## Getting Matrix Values...
## 2021-11-04 17:02:39 :
## 1 2 3 4 5 6
## Plotting Embedding
## 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18
## ArchR logging successful to : ArchRLogs/ArchR-plotEmbedding-4c787754c692-Date-2021-11-04_Time-17-02-39.log
# clean
p2Hybrid1 <- lapply(p, function(x){
x + guides(color = FALSE, fill = FALSE) +
theme_ArchR(baseSize = 6.5) +
theme(plot.margin = unit(c(0, 0, 0, 0), "cm")) +
theme(
axis.text.x=element_blank(),
axis.ticks.x=element_blank(),
axis.text.y=element_blank(),
axis.ticks.y=element_blank()
)
})
cowplot::plot_grid(plotlist=p2Hybrid1[1:9])

cowplot::plot_grid(plotlist=p2Hybrid1[10:18])

Injured/uninjured contribution of marker genes
geneScoreMat <- ArchR::getMatrixFromProject(archProj,
useMatrix = "GeneScoreMatrix")
## ArchR logging to : ArchRLogs/ArchR-getMatrixFromProject-4c78398a54f3-Date-2021-11-04_Time-17-02-55.log
## If there is an issue, please report to github with logFile!
## 2021-11-04 17:03:12 : Organizing colData, 0.283 mins elapsed.
## 2021-11-04 17:03:12 : Organizing rowData, 0.284 mins elapsed.
## 2021-11-04 17:03:12 : Organizing Assays (1 of 1), 0.284 mins elapsed.
## 2021-11-04 17:03:15 : Constructing SummarizedExperiment, 0.332 mins elapsed.
## 2021-11-04 17:03:16 : Finished Matrix Creation, 0.348 mins elapsed.
geneScoreMatHybrid1 <- geneScoreMat[,colData(geneScoreMat)$predAnn == "hybrid1"]
table(geneScoreMatHybrid1$treat)
##
## Inj UI
## 77 120
rafalib::mypar(mfrow=c(3,3))
for(gg in 1:length(markerGenesHybrid1)){
boxplot(log1p(assays(geneScoreMatHybrid1)$GeneScoreMatrix[which(rowData(geneScoreMatHybrid1)$name == markerGenesHybrid1[gg]),]) ~ geneScoreMatHybrid1$treat,
xlab = "Treatment", ylab = "Log Gene activity score + 1",
main = markerGenesHybrid1[gg])
}


Marker genes for hybrid2
markerGenesHybrid2 <- geneMarkers_subHybrid$hybrid2$name[1:18]
p <- plotEmbedding(
ArchRProj = archProj,
colorBy = "GeneScoreMatrix",
name = markerGenesHybrid2,
embedding = "UMAP_cor",
quantCut = c(0.01, 0.95),
size=1/3,
plotAs="points"
)
## Getting ImputeWeights
## No imputeWeights found, returning NULL
## ArchR logging to : ArchRLogs/ArchR-plotEmbedding-4c783d2c9f26-Date-2021-11-04_Time-17-03-16.log
## If there is an issue, please report to github with logFile!
## Getting UMAP Embedding
## ColorBy = GeneScoreMatrix
## Getting Matrix Values...
## 2021-11-04 17:03:17 :
## 1 2 3 4 5 6
## Plotting Embedding
## 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18
## ArchR logging successful to : ArchRLogs/ArchR-plotEmbedding-4c783d2c9f26-Date-2021-11-04_Time-17-03-16.log
# clean
p2Hybrid2 <- lapply(p, function(x){
x + guides(color = FALSE, fill = FALSE) +
theme_ArchR(baseSize = 6.5) +
theme(plot.margin = unit(c(0, 0, 0, 0), "cm")) +
theme(
axis.text.x=element_blank(),
axis.ticks.x=element_blank(),
axis.text.y=element_blank(),
axis.ticks.y=element_blank()
)
})
cowplot::plot_grid(plotlist=p2Hybrid2[1:9])

cowplot::plot_grid(plotlist=p2Hybrid2[10:18])

Injured/uninjured contribution of marker genes
geneScoreMatHybrid2 <- geneScoreMat[,colData(geneScoreMat)$predAnn == "hybrid2"]
table(geneScoreMatHybrid2$treat)
##
## Inj UI
## 47 176
rafalib::mypar(mfrow=c(3,3))
for(gg in 1:length(markerGenesHybrid2)){
boxplot(log1p(assays(geneScoreMatHybrid2)$GeneScoreMatrix[which(rowData(geneScoreMatHybrid2)$name == markerGenesHybrid2[gg]),]) ~ geneScoreMatHybrid2$treat,
xlab = "Treatment", ylab = "Log Gene activity score + 1",
main = markerGenesHybrid2[gg])
}


Marker genes for other hybrid cells
markerGenesHybrid <- geneMarkers_subHybrid$C2_Hybrid$name[1:18]
p <- plotEmbedding(
ArchRProj = archProj,
colorBy = "GeneScoreMatrix",
name = markerGenesHybrid,
embedding = "UMAP_cor",
quantCut = c(0.01, 0.95),
size=1/3,
plotAs="points"
)
## Getting ImputeWeights
## No imputeWeights found, returning NULL
## ArchR logging to : ArchRLogs/ArchR-plotEmbedding-4c78823aefa-Date-2021-11-04_Time-17-03-34.log
## If there is an issue, please report to github with logFile!
## Getting UMAP Embedding
## ColorBy = GeneScoreMatrix
## Getting Matrix Values...
## 2021-11-04 17:03:34 :
## 1 2 3 4 5 6
## Plotting Embedding
## 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18
## ArchR logging successful to : ArchRLogs/ArchR-plotEmbedding-4c78823aefa-Date-2021-11-04_Time-17-03-34.log
# clean
p2Hybrid <- lapply(p, function(x){
x + guides(color = FALSE, fill = FALSE) +
theme_ArchR(baseSize = 6.5) +
theme(plot.margin = unit(c(0, 0, 0, 0), "cm")) +
theme(
axis.text.x=element_blank(),
axis.ticks.x=element_blank(),
axis.text.y=element_blank(),
axis.ticks.y=element_blank()
)
})
cowplot::plot_grid(plotlist=p2Hybrid[1:9])

cowplot::plot_grid(plotlist=p2Hybrid[10:18])

Accesssibility of key lineage-specific genes from other cell types / lineages
# John suggested Kit1, but there's only Kit and Kitl
#
markerGenesLineageSpecific <- c("Sox2", "Ascl1", "Kit", "Neurog1", "Neurod1")
p <- plotEmbedding(
ArchRProj = archProj,
colorBy = "GeneScoreMatrix",
name = markerGenesLineageSpecific,
embedding = "UMAP_cor",
quantCut = c(0.01, 0.95),
size=1/3,
plotAs="points"
)
## Getting ImputeWeights
## No imputeWeights found, returning NULL
## ArchR logging to : ArchRLogs/ArchR-plotEmbedding-4c78254a72a1-Date-2021-11-04_Time-17-04-00.log
## If there is an issue, please report to github with logFile!
## Getting UMAP Embedding
## ColorBy = GeneScoreMatrix
## Getting Matrix Values...
## 2021-11-04 17:04:00 :
## 1 2 3 4 5 6
## Plotting Embedding
## 1 2 3 4 5
## ArchR logging successful to : ArchRLogs/ArchR-plotEmbedding-4c78254a72a1-Date-2021-11-04_Time-17-04-00.log
# clean
p2LineageSpecific <- lapply(p, function(x){
x + guides(color = FALSE, fill = FALSE) +
theme_ArchR(baseSize = 6.5) +
theme(plot.margin = unit(c(0, 0, 0, 0), "cm")) +
theme(
axis.text.x=element_blank(),
axis.ticks.x=element_blank(),
axis.text.y=element_blank(),
axis.ticks.y=element_blank()
)
})
cowplot::plot_grid(plotlist=p2LineageSpecific)
